# HETEROGENEITY IN MAIN RESULTS by whether discussion was positive or negative?
# discuss_obs_per_group, at the group level
# pos_mentions, neg_mentions
crossing <- tidyr::crossing

discuss_obs %>% glimpse

# Amalgamate to the group level
discuss_obs_per_group %>% glimpse

# Merge with the r2 round
r2_w_discuss_posneg <- r2_choices_num %>%
  tidylog::left_join(discuss_obs_per_group %>% select(group_id, p_pos_mentions, p_neg_mentions), by = c("group_id")) %>%
  #   Code p_pos_mentions and p_neg_mentions as 0 if they are NA
  count_prop(phase, p_pos_mentions) %>%
  mutate(
    p_pos_mentions = ifelse(is.na(p_pos_mentions), 0, p_pos_mentions),
    p_neg_mentions = ifelse(is.na(p_neg_mentions), 0, p_neg_mentions)
  ) %>%
  mutate(
    net_mention_score = p_pos_mentions - p_neg_mentions,
    any_pos_mentions = p_pos_mentions > 0 %>% as.numeric(),
    any_neg_mentions = p_neg_mentions > 0 %>% as.numeric(),
    silence = p_pos_mentions == 0 & p_neg_mentions == 0
  ) %>%

  #   Add on R1 choices
  tidylog::left_join(r1_n_choose_trans %>% select(group_id, n_r1_choose_trans_group), by = c("group_id"))

r1_n_choose_trans


r2_w_discuss_posneg %>% count_prop(p_pos_mentions, n_r1_choose_trans_group)


# ALSO LOOK AT r2_w_discuss_het
cluster <- "group_id"

list(feols_custom(
  r2_choose_trans ~ discussion_full * (p_pos_mentions + p_neg_mentions)  + video_type_placebo + video_type_treatment + factor(stratum_id) + delivery_incentive_exp + item_diff + r2_reliability_diff * r2_reliability_shown ,
  data = r2_w_discuss_posneg %>% filter(discuss_type %in% c("control", "discussion_full")) %>% filter(!is.na(r2_choose_trans)),
  fixef = c("stratum_id", "video_type", "delivery_incentive_exp", "comparator_order_in_pair", "phase"),
  cluster = cluster,
  stratum = "stratum_id",
  lasso = TRUE,
  lasso_options = list(
    potential_controls = control_vars,
    t = "discussion_full",
    interact = "pair_includes_trans",
    group_control = TRUE
  ),
  var_types = var_types_spec,
)) %>%
  tex_export(
    coef_rename = coef_label,
    gof_map = fe_label_no_fe,
    coef_omit = vars_to_regex(c("stratum_id", "Intercept", "video", "group_control", "pair_includes_trans_alt$", "phase", control_vars, "delivery")),
    dep_means = list(
      "Mean: No discussion (private) + worker is non-trans" = "discuss_type == 'control' & pair_includes_trans == FALSE",
      "Mean: No discussion (private) + worker is trans" = "discuss_type == 'control' & pair_includes_trans == TRUE"
    )
  )

r2_w_discuss_posneg %>%
  filter(discuss_type %in% c("control", "discussion_full")) %>% filter(!is.na(r2_choose_trans)) %>%
  bar_chart(x = p_pos_mentions, y = r2_choose_trans, fill = discuss_type, width = 0.25)

# Is it about choices and not communication per se? ----------------------------------------------------------------------------------------------------------------

r2_w_discuss_posneg %>% count_prop(p_pos_mentions, n_r1_choose_trans_group)

r2_w_discuss_posneg %>%
  filter(discuss_type %in% c("control", "discussion_full")) %>% filter(!is.na(r2_choose_trans)) %>%
  bar_chart(x = p_pos_mentions, y = n_r1_choose_trans_group, width = 0.25)


# Look at the individual level stuff? ----------------------------------------------------------------------------------------------------------------

# PLOT HETEROGENEITY BY DISCUSSION TYPE
r2_w_discuss_het %>%
  filter(discuss_type %in% c("control", "discussion_full")) %>% filter(!is.na(r2_choose_trans)) %>%
  count_prop(favourable_discussion_z, discuss_type) %>%
  mutate(favourable_discussion_z = ifelse(is.na(favourable_discussion_z), runif(nrow(.), -3, 1), favourable_discussion_z)) %>%
  mutate(favourable_discussion_z_group_excl = ifelse(is.na(favourable_discussion_z), runif(nrow(.), -3, 1), favourable_discussion_z)) %>%
  count_prop(spoke_pro_trans_group_excl, discuss_type) %>%
  mutate(
    spoke_pro_trans_group_excl = ifelse(is.na(spoke_pro_trans_group_excl), runif(nrow(.), 0, 1), spoke_pro_trans_group_excl)
  ) %>%
  ggplot(aes(x = spoke_pro_trans_group_excl, y = r2_choose_trans, colour = discuss_type)) +
  geom_smooth(method = "lm")


# FOR DISCUSSION FULL
r2_w_discuss_het %>%
  filter(discuss_type %in% c("control", "discussion_full")) %>% filter(!is.na(r2_choose_trans)) %>%
  mutate(spoke_pro_trans_group_excl = ifelse(is.na(spoke_pro_trans_group_excl), 0, spoke_pro_trans_group_excl)) %>%
  bar_chart(x = spoke_pro_trans_group_excl, y = r2_choose_trans, fill = discuss_type, width = 0.1)


# FOR LISTENERS

# First, check the level of analysis of the dataset
r2_w_discuss_het %>%
  count_prop(is_listener) %>%
  filter(!is.na(is_listener)) %>%
  select(ind_id, group_id, is_listener, spoke_pro_trans_group, spoke_pro_trans_group_excl) %>%
  # for listeners, spoke_pro_trans_group is always the same as spoke_pro_trans_group_excl
  count_prop(is_listener, spoke_pro_trans_group)

# Now, analyse listeners vs control
r2_w_discuss_het %>%
  filter(discuss_type == "control" | is_listener %in% 1) %>%
  filter(!is.na(r2_choose_trans)) %>%
  mutate(spoke_pro_trans_group_excl = ifelse(is.na(spoke_pro_trans_group_excl), 0, spoke_pro_trans_group_excl)) %>%
  bar_chart(
    x = spoke_pro_trans_group_excl, y = r2_choose_trans, fill = discuss_type, width = 0.1
  )

# Regression for spoke_pro_trans_group_excl ----------------------------------------------------------------------------------------------------------------

models_spoke_pro_trans_group_excl <- list(
  "Discussion participants" =  feols_custom(
    r2_choose_trans ~ discussion_full * (spoke_pro_trans_group_excl + p_neg_mentions)  + video_type_placebo + video_type_treatment + delivery_incentive_exp + item_diff + r2_reliability_diff * r2_reliability_shown  + stratum_id + delivery_incentive_exp + comparator_order_in_pair + phase,
    data = r2_w_discuss_het %>%
      filter(discuss_type %in% c("control", "discussion_full")) %>%
      filter(!is.na(r2_choose_trans)) %>%
      mutate(across(c(spoke_pro_trans_group_excl, p_neg_mentions), ~ ifelse(is.na(.x), 0, .x))),
    cluster = cluster,
    stratum = "stratum_id",
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = "discussion_full",
      interact = "pair_includes_trans",
      group_control = TRUE
    ),
    var_types = var_types_spec,
  ),
  "Listeners" =  feols_custom(
    r2_choose_trans ~ discussion_pair_listener * (spoke_pro_trans_group_excl + p_neg_mentions)  + video_type_placebo + video_type_treatment + delivery_incentive_exp + item_diff + r2_reliability_diff * r2_reliability_shown + stratum_id + delivery_incentive_exp + comparator_order_in_pair + phase,
    data = r2_w_discuss_het %>%
      filter(treat_type_r2 %in% c("control", "discussion_listener")) %>%
      filter(!is.na(r2_choose_trans)) %>%
      mutate(across(c(spoke_pro_trans_group_excl, p_neg_mentions), ~ ifelse(is.na(.x), 0, .x))),
    cluster = cluster,
    stratum = "stratum_id",
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = "discussion_full",
      interact = "pair_includes_trans",
      group_control = TRUE
    ),
    var_types = var_types_spec,
  )
) 

models_spoke_pro_trans_group_excl %>%
  tex_export(
    coef_rename = coef_label,
    gof_map = fe_label_no_fe,
    coef_omit = vars_to_regex(c("stratum_id", "Intercept", "video", "group_control", "pair_includes_trans_alt$", "phase", control_vars, "delivery")),
    dep_means = list(
      "Mean: No discussion (private) + worker is non-trans" = "discuss_type == 'control' & pair_includes_trans == FALSE",
      "Mean: No discussion (private) + worker is trans" = "discuss_type == 'control' & pair_includes_trans == TRUE"
    )
  )

models_spoke_pro_trans_group_excl[[1]] %>% get_coeff("discussion_full") %>%
  times_100 %>%
  write_stat("outputs/stats/silent_discussion_coeff.tex", digits = 0)

models_spoke_pro_trans_group_excl[[1]] %>% get_p_val("discussion_full") %>%
  write_p_val("outputs/stats/silent_discussion_p.tex", digits = 2)

models_spoke_pro_trans_group_excl[[2]] %>% get_coeff("discussion_pair_listener") %>%
  times_100 %>%
  write_stat("outputs/stats/silent_discussion_listener_coeff.tex", digits = 0)

models_spoke_pro_trans_group_excl[[2]] %>% get_p_val("discussion_pair_listener") %>%
  write_p_val("outputs/stats/silent_discussion_listener_p.tex", digits = 2)

# Check the listener effect, restricting to P(pro-trans) == 0
model_non_linear_listener <- feols_custom(
    r2_choose_trans ~  discussion_pair_listener + p_neg_mentions + factor(spoke_pro_trans_group_excl) + delivery_incentive_exp + item_diff + r2_reliability_diff * r2_reliability_shown ,
    data = r2_w_discuss_het %>%
      filter(treat_type_r2 %in% c("control", "discussion_listener")) %>%
      filter(!is.na(r2_choose_trans)) %>%
      mutate(across(c(spoke_pro_trans_group_excl, p_neg_mentions), ~ ifelse(is.na(.x), 0, .x))),
    fixef = c("stratum_id", "video_type", "delivery_incentive_exp", "comparator_order_in_pair", "phase"),
    cluster = cluster,
    stratum = "stratum_id",
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = "discussion_pair_listener",
      group_control = TRUE
    ),
    var_types = var_types_spec,
  )


r2_w_discuss_het %>%
      filter(treat_type_r2 %in% c("discussion_listener")) %>%
      filter(!is.na(r2_choose_trans)) %>%
      filter(spoke_pro_trans_group_excl == 0) %>% 
      select(ind_id) %>% 
      distinct()

model_non_linear_listener %>% get_coeff("discussion_pair_listener") %>%
  times_100 %>%
  write_stat("outputs/stats/silent_discussion_non_linear_listener_coeff.tex", digits = 0)

model_non_linear_listener %>% get_p_val("discussion_pair_listener") %>%
  write_p_val("outputs/stats/silent_discussion_non_linear_listener_p.tex", digits = 2)

# Correlation from just pro-trans choices in control group ----------------------------------------------------------------------------------------------------------------

r2_w_discuss_het %>% count_prop(discuss_type, n_r1_choose_trans_group, n_r1_choose_trans)

r2_w_discuss_het$p_r1_choose_trans_group


r2_w_discuss_het %>%
  filter(discuss_type == "control" | is_listener %in% 1) %>%
  filter(!is.na(r2_choose_trans)) %>%
  mutate(
    p_r1_choose_trans_group = coalesce(p_r1_choose_trans_group, p_r1_choose_trans_group_excl)
  ) %>%
  bar_chart(x = p_r1_choose_trans_group, y = r2_choose_trans, fill = discuss_type, width = 0.1)


# Not much correlation in control group?
feols_custom(
  r2_choose_trans ~ p_r1_choose_trans_group_excl,
  data = r2_w_discuss_het %>%
    filter(discuss_type == "control"),
  fixef = c("stratum_id", "video_type", "delivery_incentive_exp", "comparator_order_in_pair", "phase"),
  cluster = "group_id"
)


r1_n_choose_trans %>%
  filter(discuss_type=="control") %>%
  select(group_id, ind_id, n_r1_choose_trans, n_r1_choose_trans_ind, n_r1_choose_trans_group, n_r1_choose_trans_group_all, p_r1_choose_trans_group_excl)



# Heterogeneity graphs final ----------------------------------------------------------------------------------------------------------------

r2_w_discuss_het %>% count_prop(is_listener, discuss_type)

# 1. Confidence intervals for treatment groups, for the error bars
r2_w_discuss_het_conf_ints <- crossing(
  spoke_pro_trans_group_excl = seq(0, 1, 0.25),
  discuss_type = c("discussion_pair", "discussion_full")
) %>%
  mutate(
    model = map2(spoke_pro_trans_group_excl, discuss_type,
                 ~ feols_custom(r2_choose_trans ~ 1,
                                data = r2_w_discuss_het %>% filter(discuss_type == .y, !(is_listener %in% 0)) %>% filter(spoke_pro_trans_group_excl == .x),
                                cluster = "group_id")
    )
  ) %>%
  mutate(
    coeffs = map(model, tidy_90)
  ) %>%
  unnest(coeffs)

# 2. Confidence intervals for control group
r2_w_discuss_het_conf_ints_control <- feols_custom(r2_choose_trans ~ 1,
                                                   data = r2_w_discuss_het %>% filter(discuss_type == "control"),
                                                   cluster = "group_id") %>%
  tidy_90


# 3. Regression lines for treatment groups
get_ci_regression <- function(reg) {

  pred <-  predict(reg, newdata = tibble(spoke_pro_trans_group_excl = seq(0, 1, 0.01)), se = TRUE) %>%
    as_tibble() %>%
    mutate(spoke_pro_trans_group_excl = seq(0, 1, 0.01)) %>%
    print %>%
    mutate(
      reg_lower = fit - (qnorm(0.975) * se.fit),
      reg_upper = fit + (qnorm(0.975) * se.fit)
    )

  return(pred)
}

reg_discussion_full <- feols_custom(
  r2_choose_trans ~ spoke_pro_trans_group_excl,
  data = r2_w_discuss_het %>% filter(discuss_type == "discussion_full")
) %>%
  get_ci_regression() %>%
  mutate(discuss_type = "discussion_full")

reg_listener <- feols_custom(
  r2_choose_trans ~ spoke_pro_trans_group_excl,
  data = r2_w_discuss_het %>% filter(is_listener %in% 1)
) %>%
  get_ci_regression() %>%
  mutate(discuss_type = "discussion_pair")



r2_w_discuss_het

# Calculate sample sizes
n_full <- n_distinct(r2_w_discuss_het %>% filter(discuss_type == "discussion_full") %>% pull(ind_id))
n_listener <- n_distinct(r2_w_discuss_het %>% filter(is_listener %in% 1) %>% pull(ind_id))

# Add discuss_type to regression prediction data frames
reg_discussion_full <- reg_discussion_full %>%
  mutate(discuss_type = "discussion_full")

reg_listener <- reg_listener %>%
  mutate(discuss_type = "discussion_pair")

# Create a data frame for the control group line and CI across x-range
control_line <- tibble(
  spoke_pro_trans_group_excl = seq(0, 1, 0.01),
  estimate = r2_w_discuss_het_conf_ints_control$estimate[1],
  conf.low = r2_w_discuss_het_conf_ints_control$conf.low[1],
  conf.high = r2_w_discuss_het_conf_ints_control$conf.high[1]
)

# Convert estimate to a percentage string (e.g., 0.382 -> "38.2%")
control_pct <- paste0(round(control_line$estimate[1] * 100, 1), "%")

# Define custom facet labels including sample sizes
facet_labels <- as_labeller(c(
  "discussion_full" = paste0("3-person discussion (N=", n_full, ")"),
  "discussion_pair" = paste0("Listener (N=", n_listener, ")")
))

# Define custom colors
color_vals <- c("discussion_full" = "firebrick",
                "discussion_pair" = "forestgreen")


reg_discussion_full_stats <- models_spoke_pro_trans_group_excl[[1]] %>% tidy()


reg_discussion_full_stats %>% filter(term=="spoke_pro_trans_group_excl") %>% pull(estimate) %>% times_100 %>% {. / 4} %>% write_stat("outputs/stats/het_by_posneg_discussion_full_coeff.tex", digits = 1)

reg_listener_stats <- models_spoke_pro_trans_group_excl[[2]] %>% tidy()

reg_listener_stats %>% filter(term=="spoke_pro_trans_group_excl") %>% pull(estimate) %>% times_100 %>% {. / 4} %>% write_stat("outputs/stats/het_by_posneg_discussion_listener_coeff.tex", digits = 1)

ggplot() +
  # Control group CI ribbon (transparent fill, dotted outline)
  geom_ribbon(
    data = control_line,
    aes(x = spoke_pro_trans_group_excl, ymin = conf.low, ymax = conf.high),
    fill = "skyblue3", alpha = 0.07, linetype = "dotted", color = "skyblue3"
  ) +
  # Control group estimate line
  geom_line(
    data = control_line,
    aes(x = spoke_pro_trans_group_excl, y = estimate),
    color = "skyblue3", size = 1
  ) +

  # Annotate control group line
  # Positioning text slightly above the control line (e.g., y + 0.02)
  geom_text(
  data = tibble(discuss_type = "discussion_full"),
  aes(x = 0.7, y = control_line$estimate[1] - 0.064,
      label = paste("No discussion:", control_pct)),
  color = "skyblue3",
  size = 3.2
) +
  # Regression CI ribbons for discussion_full and discussion_pair
  geom_ribbon(
    data = reg_discussion_full,
    aes(x = spoke_pro_trans_group_excl, ymin = reg_lower, ymax = reg_upper, fill = discuss_type),
    alpha = 0.07, linetype = "dotted", color = "firebrick"
  ) +
  geom_ribbon(
    data = reg_listener,
    aes(x = spoke_pro_trans_group_excl, ymin = reg_lower, ymax = reg_upper, fill = discuss_type),
    alpha = 0.07, linetype = "dotted", color = "forestgreen"
  ) +

  # Regression lines
  geom_line(
    data = reg_discussion_full,
    aes(x = spoke_pro_trans_group_excl, y = fit, color = discuss_type),
    size = 1
  ) +
  geom_line(
    data = reg_listener,
    aes(x = spoke_pro_trans_group_excl, y = fit, color = discuss_type),
    size = 1
  ) +

  # Pointwise estimates and intervals from r2_w_discuss_het_conf_ints, colored by category, slightly opaque
  geom_point(
    data = r2_w_discuss_het_conf_ints %>%
      filter(discuss_type %in% c("discussion_full", "discussion_pair")),
    aes(x = spoke_pro_trans_group_excl,
        y = estimate,
        ymin = conf.low,
        ymax = conf.high,
        color = discuss_type),
    size = 3,
    alpha = 0.3
  ) +
  geom_errorbar(
    data = r2_w_discuss_het_conf_ints %>%
      filter(discuss_type %in% c("discussion_full", "discussion_pair")),
    aes(x = spoke_pro_trans_group_excl,
        y = estimate,
        ymin = conf.low,
        ymax = conf.high,
        color = discuss_type),
    size = 1,
    width = 0,
    alpha = 0.2
  ) +

  # Add coefficient annotations
  geom_text(
    data = tibble(discuss_type = "discussion_full",
                 x = 0.05, y = 0.1,
                 label = sprintf("Slope: %.2f (p<0.001)", 
                               reg_discussion_full_stats %>% filter(term == "spoke_pro_trans_group_excl") %>% pull(estimate))),
    aes(x = x, y = y, label = label),
    hjust = 0,
    size = 3
  ) +
  geom_text(
    data = tibble(discuss_type = "discussion_full", 
                 x = 0.05, y = 0.05,
                 label = sprintf("H₀ (intercept=control): p=%.2f",
                               reg_discussion_full_stats %>% filter(term == "discussion_full") %>% pull(p.value))),
    aes(x = x, y = y, label = label),
    hjust = 0,
    size = 3
  ) +
  geom_text(
    data = tibble(discuss_type = "discussion_pair",
                 x = 0.05, y = 0.10,
                 label = sprintf("Slope: %.2f (p<0.001)", 
                               reg_listener_stats %>% filter(term == "spoke_pro_trans_group_excl") %>% pull(estimate))),
    aes(x = x, y = y, label = label),
    hjust = 0,
    size = 3
  ) +
  geom_text(
    data = tibble(discuss_type = "discussion_pair", 
                 x = 0.05, y = 0.05,
                 label = sprintf("H₀ (intercept=control): p=%.2f",
                               reg_listener_stats %>% filter(term == "discussion_pair_listener") %>% pull(p.value))),
    aes(x = x, y = y, label = label),
    hjust = 0,
    size = 3
  ) +
  facet_wrap(~ discuss_type, labeller = facet_labels) +
  scale_color_manual(
    values = color_vals,
    name = NULL, # remove legend title
    breaks = c("discussion_full","discussion_pair"),
    labels = c("3-person discussion","Listener")
  ) +
  scale_fill_manual(
    values = color_vals,
    name = NULL, # remove legend title
    breaks = c("discussion_full","discussion_pair"),
    labels = c("3-person discussion","Listener")
  ) +
  theme_classic() +
  expand_limits(y = 0) +
  labs(
    x = "P(Other group members spoke pro-trans)",
    y = "P(Chose trans in outcome round)"
  )

ggsave("outputs/figs/het_by_posneg_discussion.pdf", width = 10.5, height = 5.5, scale = 0.6)

# Export stat
reg_listener_stats %>% filter(term == "discussion_pair_listener") %>% pull(p.value) %>% write_p_val(
  "outputs/stats/het_by_posneg_discussion_listener_pval.tex",
  digits = 2
)


# Look at heterogeneity in group-predictions by pos/neg discussions (similar graph) ----------------------------------------------------------------------------------------------

# Create merged dataset for group predictions analysis
group_predic_w_discuss <- group_predic %>%
  tidylog::left_join(gen_att_in_discussion, by = "ind_id", suffix = c("", "_extra")) %>%
  tidylog::left_join(spoke_in_favour_discuss, by = "ind_id", suffix = c("", "_extra")) %>%
  tidylog::left_join(discuss_obs_per_group %>% select(group_id, p_pos_mentions, p_neg_mentions), by = c("group_id")) %>%
  tidylog::left_join(r1_n_choose_trans, by = "ind_id", suffix = c("", "_extra")) %>%
  tidylog::left_join(r1_amount_discussed, by = "group_id", suffix = c("", "_extra")) %>%
  mutate(
    favourable_discussion_z = ((spoke_pro_trans_z + gobs9_z) / 2),
    favourable_discussion_z_group_excl = ((spoke_pro_trans_z_group_excl + gobs9_z_group_excl) / 2)
  )

# Calculate confidence intervals for treatment groups
group_predic_conf_ints <- crossing(
  spoke_pro_trans_group_excl = seq(0, 1, 0.25),
  discuss_type = c("discussion_pair", "discussion_full")
) %>%
  mutate(
    model = map2(spoke_pro_trans_group_excl, discuss_type,
                 ~ feols_custom(group_predic_choose_trans ~ 1,
                                data = group_predic_w_discuss %>% 
                                  filter(discuss_type == .y, !(is_listener %in% 0)) %>% 
                                  filter(spoke_pro_trans_group_excl == .x),
                                cluster = "group_id")
    )
  ) %>%
  mutate(
    coeffs = map(model, tidy_90)
  ) %>%
  unnest(coeffs)

# Calculate control group confidence intervals
group_predic_conf_ints_control <- feols_custom(group_predic_choose_trans ~ 1,
                                              data = group_predic_w_discuss %>% 
                                                filter(discuss_type == "control"),
                                              cluster = "group_id") %>%
  tidy_90

# Regression lines for treatment groups
reg_discussion_full_predic <- feols_custom(group_predic_choose_trans ~ spoke_pro_trans_group_excl,
                                         data = group_predic_w_discuss %>% 
                                           filter(discuss_type == "discussion_full"),
                                         cluster = "group_id") %>%
  get_ci_regression() %>%
  mutate(discuss_type = "discussion_full")

reg_listener_predic <- feols_custom(group_predic_choose_trans ~ spoke_pro_trans_group_excl,
                                  data = group_predic_w_discuss %>% 
                                    filter(is_listener %in% 1),
                                  cluster = "group_id") %>%
  get_ci_regression() %>%
  mutate(discuss_type = "discussion_pair")

# Calculate sample sizes
n_full_predic <- n_distinct(group_predic_w_discuss %>% 
                           filter(discuss_type == "discussion_full") %>% 
                           pull(ind_id))
n_listener_predic <- n_distinct(group_predic_w_discuss %>% 
                              filter(is_listener %in% 1) %>% 
                              pull(ind_id))

# Create control line data
control_line_predic <- tibble(
  spoke_pro_trans_group_excl = seq(0, 1, 0.01),
  estimate = group_predic_conf_ints_control$estimate[1],
  conf.low = group_predic_conf_ints_control$conf.low[1],
  conf.high = group_predic_conf_ints_control$conf.high[1]
)

# Models for group predictions heterogeneity
models_spoke_pro_trans_group_excl_predic <- list(
  "Discussion participants" =  feols_custom(
    group_predic_choose_trans ~ discussion_full * (spoke_pro_trans_group_excl + p_neg_mentions) + 
      video_type_placebo + video_type_treatment + factor(stratum_id) + 
      delivery_incentive_exp + item_diff + reliability_diff * reliability_shown,
    data = group_predic_w_discuss %>%
      filter(discuss_type %in% c("control", "discussion_full")) %>%
      filter(!is.na(group_predic_choose_trans)) %>%
      mutate(across(c(spoke_pro_trans_group_excl, p_neg_mentions), ~ ifelse(is.na(.x), 0, .x))),
    fixef = c("stratum_id", "video_type", "delivery_incentive_exp", "phase"),
    cluster = cluster,
    stratum = "stratum_id",
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = "discussion_full",
      group_control = TRUE
    ),
    var_types = var_types_spec
  ),
  "Listeners" =  feols_custom(
    group_predic_choose_trans ~ discussion_pair_listener * (spoke_pro_trans_group_excl + p_neg_mentions) + 
      video_type_placebo + video_type_treatment + factor(stratum_id) + 
      delivery_incentive_exp + item_diff + reliability_diff * reliability_shown,
    data = group_predic_w_discuss %>%
      filter(treat_type_r2 %in% c("control", "discussion_listener")) %>%
      filter(!is.na(group_predic_choose_trans)) %>%
      mutate(across(c(spoke_pro_trans_group_excl, p_neg_mentions), ~ ifelse(is.na(.x), 0, .x))),
    fixef = c("stratum_id", "video_type", "delivery_incentive_exp", "phase"),
    cluster = cluster,
    stratum = "stratum_id", 
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = "discussion_pair_listener",
      group_control = TRUE
    ),
    var_types = var_types_spec
  )
)

# Extract slope and intercept statistics
slope_full <- models_spoke_pro_trans_group_excl_predic[[1]] %>% tidy() %>% filter(term == "spoke_pro_trans_group_excl") %>% pull(estimate)
intercept_full <- models_spoke_pro_trans_group_excl_predic[[1]] %>% tidy() %>% filter(term == "discussion_full") %>% pull(p.value)
slope_listener <- models_spoke_pro_trans_group_excl_predic[[2]] %>% tidy() %>% filter(term == "spoke_pro_trans_group_excl") %>% pull(estimate)
intercept_listener <- models_spoke_pro_trans_group_excl_predic[[2]] %>% tidy() %>% filter(term == "discussion_pair_listener") %>% pull(p.value)


# Export all the stats
intercept_listener %>% write_p_val("outputs/stats/het_by_posneg_discussion_listener_predic_intercept.tex", digits = 2)
intercept_full %>% write_p_val("outputs/stats/het_by_posneg_discussion_full_predic_intercept.tex", digits = 2)

# Convert control estimate to percentage
control_pct_predic <- paste0(round(control_line_predic$estimate[1] * 100, 1), "%")

# Define facet labels and colors (same as before)
facet_labels_predic <- as_labeller(c(
  "discussion_full" = paste0("3-person discussion (N=", n_full_predic, ")"),
  "discussion_pair" = paste0("Listener (N=", n_listener_predic, ")")
))

color_vals <- c("discussion_full" = "firebrick",
                "discussion_pair" = "forestgreen")

# Create the plot
ggplot() +
  # Control group CI ribbon
  geom_ribbon(
    data = control_line_predic,
    aes(x = spoke_pro_trans_group_excl, ymin = conf.low, ymax = conf.high),
    fill = "skyblue3", alpha = 0.07, linetype = "dotted", color = "skyblue3"
  ) +
  # Control group estimate line
  geom_line(
    data = control_line_predic,
    aes(x = spoke_pro_trans_group_excl, y = estimate),
    color = "skyblue3", size = 1
  ) +
  # Control group annotation
  geom_text(
    data = tibble(discuss_type = "discussion_full"),
    aes(x = 0.7, y = control_line_predic$estimate[1] - 0.064,
        label = paste("No discussion:", control_pct_predic)),
    color = "skyblue3",
    size = 3.2
  ) +
  # Regression CI ribbons
  geom_ribbon(
    data = reg_discussion_full_predic,
    aes(x = spoke_pro_trans_group_excl, ymin = reg_lower, ymax = reg_upper, fill = discuss_type),
    alpha = 0.07, linetype = "dotted", color = "firebrick"
  ) +
  geom_ribbon(
    data = reg_listener_predic,
    aes(x = spoke_pro_trans_group_excl, ymin = reg_lower, ymax = reg_upper, fill = discuss_type),
    alpha = 0.07, linetype = "dotted", color = "forestgreen"
  ) +
  # Regression lines
  geom_line(
    data = reg_discussion_full_predic,
    aes(x = spoke_pro_trans_group_excl, y = fit, color = discuss_type),
    size = 1
  ) +
  geom_line(
    data = reg_listener_predic,
    aes(x = spoke_pro_trans_group_excl, y = fit, color = discuss_type),
    size = 1
  ) +
  # Point estimates and intervals
  geom_point(
    data = group_predic_conf_ints %>%
      filter(discuss_type %in% c("discussion_full", "discussion_pair")),
    aes(x = spoke_pro_trans_group_excl,
        y = estimate,
        ymin = conf.low,
        ymax = conf.high,
        color = discuss_type),
    size = 3,
    alpha = 0.3
  ) +
  geom_errorbar(
    data = group_predic_conf_ints %>%
      filter(discuss_type %in% c("discussion_full", "discussion_pair")),
    aes(x = spoke_pro_trans_group_excl,
        y = estimate,
        ymin = conf.low,
        ymax = conf.high,
        color = discuss_type),
    size = 1,
    width = 0,
    alpha = 0.2
  ) +
  # Coefficient annotations
   # Update coefficient annotations to include both slope and intercept tests
  geom_text(
    data = tibble(discuss_type = "discussion_full",
                 x = 0.05, y = 0.1,
                 label = sprintf("Slope: %.2f (p<0.001)", slope_full)),
    aes(x = x, y = y, label = label),
    hjust = 0,
    size = 3
  ) +
  geom_text(
    data = tibble(discuss_type = "discussion_full", 
                 x = 0.05, y = 0.05,
                 label = sprintf("H₀ (intercept=control): p=%.2f", intercept_full)),
    aes(x = x, y = y, label = label),
    hjust = 0,
    size = 3
  ) +
  geom_text(
    data = tibble(discuss_type = "discussion_pair",
                 x = 0.05, y = 0.1,
                 label = sprintf("Slope: %.2f (p<0.001)", slope_listener)),
    aes(x = x, y = y, label = label),
    hjust = 0,
    size = 3
  ) +
  geom_text(
    data = tibble(discuss_type = "discussion_pair", 
                 x = 0.05, y = 0.05,
                 label = sprintf("H₀ (intercept=control): p=%.2f", intercept_listener)),
    aes(x = x, y = y, label = label),
    hjust = 0,
    size = 3
  ) + 
  facet_wrap(~ discuss_type, labeller = facet_labels_predic) +
  scale_color_manual(
    values = color_vals,
    name = NULL,
    breaks = c("discussion_full","discussion_pair"),
    labels = c("3-person discussion","Listener")
  ) +
  scale_fill_manual(
    values = color_vals,
    name = NULL,
    breaks = c("discussion_full","discussion_pair"),
    labels = c("3-person discussion","Listener")
  ) +
  theme_classic() +
  expand_limits(y = 0) +
  labs(
    x = "P(Other group members spoke pro-trans)",
    y = "P(Predicted group would choose trans)"
  )

# Save the plot
ggsave("outputs/figs/het_by_posneg_discussion_predictions.pdf", width = 10.5, height = 5.5, scale = 0.6)

# Export results
models_spoke_pro_trans_group_excl_predic %>%
  tex_export(
    coef_rename = coef_label,
    gof_map = fe_label_no_fe,
    coef_omit = vars_to_regex(c("stratum_id", "Intercept", "video", "group_control", 
                               "pair_includes_trans_alt$", "phase", control_vars, "delivery")),
    dep_means = list(
      "Mean: No discussion (private) + worker is non-trans" = "discuss_type == 'control' & pair_includes_trans == FALSE",
      "Mean: No discussion (private) + worker is trans" = "discuss_type == 'control' & pair_includes_trans == TRUE"
    )
  )

# Save key statistics
models_spoke_pro_trans_group_excl_predic[[1]] %>% get_coeff("discussion_full") %>%
  times_100 %>%
  write_stat("outputs/stats/silent_discussion_predic_coeff.tex", digits = 0)

models_spoke_pro_trans_group_excl_predic[[1]] %>% get_p_val("discussion_full") %>%
  write_p_val("outputs/stats/silent_discussion_predic_p.tex", digits = 2)

models_spoke_pro_trans_group_excl_predic[[2]] %>% get_coeff("discussion_pair_listener") %>%
  times_100 %>%
  write_stat("outputs/stats/silent_discussion_listener_predic_coeff.tex", digits = 0)

models_spoke_pro_trans_group_excl_predic[[2]] %>% get_p_val("discussion_pair_listener") %>%
  write_p_val("outputs/stats/silent_discussion_listener_predic_p.tex", digits = 2)
